Fish biodiversity in Marine World Heritage Sites

This notebook explores fish diversity in Marine World Heritage Sites using OBIS data.

Fish species in WoRMS

First read all WoRMS species. As it’s not possible to get a full species list from the WoRMS or GBIF web services, I’m reading directly from an export provided by WoRMS.

library(dplyr)

worms <- read.csv("worms.txt", sep = "\t", quote = "") %>%
  as_tibble() %>%
  filter(taxonRank == "Species" & taxonomicStatus == "accepted") %>%
  select(scientificName, phylum, class, order) %>%
  mutate(species_fish = ifelse(class %in% c("Actinopterygii", "Coelacanthi", "Elasmobranchii", "Holocephali"), scientificName, NA))

Here I’m using the rredlist package to get all Red List species from IUCN:

library(dplyr)
library(rredlist)

redlist <- tibble()
page <- 0

while (TRUE) {
  res <- rl_sp(page, key = "a936c4f78881e79a326e73c4f97f34a6e7d8f9f9e84342bff73c3ceda14992b9")$result
  if (length(res) == 0) {
    break
  }
  redlist <- bind_rows(redlist, res)
  page <- page + 1
}

redlist <- redlist %>%
  as_tibble() %>%
  filter(category %in% c("CR", "EN", "VU", "EW", "EX"))

Now let’s label Red List species in the worms data frame.

worms <- worms %>%
  mutate(species_vulnerable = ifelse(scientificName %in% redlist$scientific_name, scientificName, NA)) %>%
  mutate(species_vulnerable_fish = ifelse(!is.na(species_fish) & scientificName %in% redlist$scientific_name, scientificName, NA)) 

And calculate some statistics:

library(knitr)

worms %>%
  summarize(
    species = n(),
    fish = length(unique(species_fish)),
    vulnerable = length(unique(species_vulnerable)),
    vulnerable_fish = length(unique(species_vulnerable_fish))
  ) %>%
  kable(format.args = list(big.mark = ","))
species fish vulnerable vulnerable_fish
221,111 19,755 1,374 826

Fish data in OBIS

library(robis)

if (!file.exists("checklist.Rdata")) {
  cl <- checklist(dropped = "include")
  cl <- cl %>%
    filter(is_marine | is_brackish) %>%
    mutate(species_fish = ifelse(superclass == "Pisces", species, NA)) %>%
    mutate(species_vulnerable = ifelse(category %in% c("VU", "EN", "CR"), species, NA)) %>%
    mutate(species_vulnerable_fish = ifelse(superclass == "Pisces" & category %in% c("VU", "EN", "CR"), species, NA))
  save(cl, file = "checklist.Rdata")
} else {
  load("checklist.Rdata")
}

cl %>%
  summarize(
    records = sum(records),
    species = length(unique(species)),
    fish = length(unique(species_fish)),
    vulnerable = length(unique(species_vulnerable)),
    vulnerable_fish = length(unique(species_vulnerable_fish))
  ) %>%
  kable(format.args = list(big.mark = ","))
records species fish vulnerable vulnerable_fish
73,980,477 134,463 16,923 1,035 604

Fish data for the Marine World Heritage Sites

Fetch spatial data

First fetch the Marine World Heritage Sites shapefile from MarineRegions:

library(sf)
library(dplyr)

if (!file.exists("shape/worldheritagemarineprogramme.shp")) {
  download.file("http://geo.vliz.be/geoserver/wfs?request=getfeature&service=wfs&version=1.0.0&typename=MarineRegions:worldheritagemarineprogramme&outputformat=SHAPE-ZIP", "shape.zip")
  unzip("shape.zip", exdir = "shape")
}

shapes <- st_read("shape/worldheritagemarineprogramme.shp") %>%
  select(full_name, country, geometry) %>%
  mutate(full_name = iconv(full_name, "latin1", "UTF-8"))
## Reading layer `worldheritagemarineprogramme' from data source `/Users/pieter/Desktop/notebook-mwhs/shape/worldheritagemarineprogramme.shp' using driver `ESRI Shapefile'
## Simple feature collection with 129 features and 15 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -180 ymin: -55.00039 xmax: 180 ymax: 71.80583
## geographic CRS: WGS 84

Let’s create a map.

library(mapview)
mapviewOptions(fgb = FALSE)

mapview(shapes)@map

Process spatial data

library(concaveman)
library(sfheaders)

shapes_processed <- shapes %>%
  mutate(geometry = sf_remove_holes(geometry)) %>%
  group_by(full_name) %>%
  summarize(geometry = st_union(geometry))

mapview(shapes, color = "red", lwd = 0.5, alpha = 1, alpha.regions = 0) +
  mapview(shapes_processed, color = "green", lwd = 0.5, alpha = 1, alpha.regions = 0, legend = FALSE)

Fetch occurrence data

Here I’m fetching data from OBIS by area. OBIS is queried by bounding box, but a point-in-polygon calculation is used to discard points outside the areas.

library(purrr)

occ_for_geom <- function(geom) {
  wkt <- st_as_text(st_as_sfc(st_bbox(geom)), digits = 6)
  message(wkt)
  occ <- occurrence(geometry = wkt, fields = c("scientificName", "species", "family", "order", "class", "superclass", "phylum", "decimalLongitude", "decimalLatitude", "date_year", "category", "dataset_id")) %>%
    st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = 4326)
  occ_filtered <- occ %>%
    filter(st_intersects(geometry, geom, sparse = FALSE)) %>%
    as_tibble() %>%
    select(-geometry)
  return(occ_filtered)
}

if (!file.exists("occurrence.Rdata")) {
  occs <- map(shapes_processed$geometry, occ_for_geom)
  for (i in 1:nrow(shapes_processed)) {
    occs[[i]]$full_name <- shapes_processed$full_name[i]
  }
  occ <- bind_rows(occs)

  occ <- occ %>%
    mutate(species_fish = ifelse(superclass == "Pisces", species, NA)) %>%
    mutate(species_vulnerable = ifelse(category %in% c("VU", "EN", "CR"), species, NA)) %>%
    mutate(species_vulnerable_fish = ifelse(superclass == "Pisces" & category %in% c("VU", "EN", "CR"), species, NA))

  save(occ, file = "occurrence.Rdata")
} else {
  load("occurrence.Rdata")
}

Calculate statistics

occ %>%
  filter(!is.na(species)) %>%
  summarize(
    records = n(),
    species = length(unique(species)),
    fish = length(unique(species_fish)),
    vulnerable = length(unique(species_vulnerable)),
    vulnerable_fish = length(unique(species_vulnerable_fish))
  ) %>%
  kable(format.args = list(big.mark = ","))
records species fish vulnerable vulnerable_fish
2,788,079 27,569 6,210 427 236
site_stats <- occ %>%
  filter(!is.na(species)) %>%
  group_by(full_name) %>%
  summarize(
    records = n(),
    species = length(unique(species)),
    fish = length(unique(species_fish)),
    vulnerable = length(unique(species_vulnerable)),
    vulnerable_fish = length(unique(species_vulnerable_fish))
  )

site_stats %>%
  kable(format.args = list(big.mark = ","))
full_name records species fish vulnerable vulnerable_fish
Aldabra Atoll 3,429 1,159 503 65 7
Area de ConservaciĂ³n Guanacaste 932 310 307 8 6
Banc d’Arguin National Park 112 40 22 4 3
Belize Barrier Reef Reserve System 8,391 1,077 350 29 23
Brazilian Atlantic Islands: Fernando de Noronha and Atol das Rocas Reserves 945 273 123 9 8
Cocos Island National Park 2,317 499 414 43 42
Coiba National Park and its Special Zone of Marine Protection 2,399 492 450 23 22
East Rennell 13 5 2 1 1
Everglades National Park 59,438 844 230 16 11
GalĂ¡pagos Islands 57,486 1,540 731 71 57
Gough and Inaccessible Islands 2,486 196 46 10 4
Great Barrier Reef 1,247,815 13,728 2,822 200 77
Gulf of Porto: Calanche of Piana, Gulf of Girolata, Scandola Reserve 38 8 2 2 1
Ha Long Bay 2,009 322 320 7 7
Heard and McDonald Islands 38,584 269 26 9 1
High Coast / Kvarken Archipelago 31,605 278 18 2 2
Ibiza, Biodiversity and Culture 92 39 10 3 2
iSimangaliso Wetland Park 14,887 2,165 955 65 51
Islands and Protected Areas of the Gulf of California 10,652 801 615 40 30
Kluane / Wrangell-St Elias / Glacier Bay / Tatshenshini-Alsek 1,126 98 71 4 1
Komodo National Park 766 402 216 8 7
Lagoons of New Caledonia: Reef Diversity and Associated Ecosystems 31,711 4,679 888 35 12
Macquarie Island 239,068 663 90 12 1
Malpelo Fauna and Flora Sanctuary 15,615 1,001 519 53 46
Natural System of Wrangel Island Reserve 156 57 11 3 1
New Zealand Sub-Antarctic Islands 15,142 1,390 122 25 10
Ningaloo Coast 255,510 3,485 928 92 41
Ogasawara Islands 228 147 121 4 4
Papahanaumokuakea 304,473 2,009 577 32 12
Península Valdés 2,939 200 5 2 1
Phoenix Islands Protected Area 2,618 498 369 9 5
Puerto-Princesa Subterranean River National Park 116 102 28 2 2
Rock Islands Southern Lagoon 4,740 888 675 19 13
Shark Bay, Western Australia 9,918 1,349 532 38 14
Shiretoko 258 51 41 2 1
Sian Ka’an 443 111 75 5 3
Socotra Archipelago 4,320 541 65 20 2
St Kilda 3,301 119 17 5 2
Surtsey 4 3 1 1 1
The Wadden Sea 410,953 1,257 99 12 10
Tubbataha Reefs Natural Park 165 31 9 8 4
Ujung Kulon National Park 186 42 2 8 1
West Norwegian Fjords ? Geirangerfjord and Nærøyfjord 10 6 1 1 1
Whale Sanctuary of El Vizcaino 683 114 108 10 10
library(ggplot2)
library(ftplottools)
library(ggrepel)

subset <- site_stats %>%
  filter(full_name %in% c("Great Barrier Reef", "Belize Barrier Reef Reserve System", "Ha Long Bay", "The Wadden Sea", "Heard and McDonald Islands", "Aldabra Atoll")) %>%
  arrange(full_name)

subset$x <- c(600, 100, 800, 500, 10, 60)
subset$y <- c(140, 80, 210, 3, 20, 40)

ggplot() +
  geom_segment(subset, mapping = aes(x = x, y = y, xend = fish, yend = vulnerable)) +
  geom_label(subset, mapping = aes(x = x, y = y, label = full_name), size = 3.5, label.size = 0) +
  geom_point(site_stats, mapping = aes(x = fish, y = vulnerable, size = species, color = species), shape = 21, stroke = 1.2, fill = "white") +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  scale_size(range = c(1, 8), breaks = c(1000, 5000, 10000)) +
  scale_color_viridis_c(end = 0.8, trans = "log10", breaks = c(1000, 5000, 10000)) +
  guides(color = guide_legend(), size = guide_legend()) +
  xlab("fish species") + ylab("vulnerable species") +
  ft_theme() +
  ggtitle("Fish and vulnerable species diversity at marine World Heritate sites")

ggsave("sites.png", width = 10, height = 6, dpi = 600)

Fish DNA barcodes in molecular databases

Let’s check BOLD for barcode sequences using all fish species in WoRMS:

# fixes issue with bold package
invisible(Sys.setlocale("LC_ALL", "C"))

if (!file.exists("bold.Rdata")) {
  fish_species <- worms %>% filter(!is.na(species_fish)) %>% pull(species_fish) %>% unique()
  bold_list <- sapply(fish_species, function(x) NULL)
  for (i in 1:length(bold_list)) {
    message(i, " ", names(bold_list)[i])
    if (is.null(bold_list[[i]])) {
      bold_list[[i]] <- tryCatch({
        caspr::bold_statistics(names(bold_list)[i])
      }, warning = function(warning_condition) {
      }, error = function(error_condition) {
      }, finally = {
      })
    }
  }
  save(bold_list, file = "bold.Rdata")
} else {
  load("bold.Rdata")
}

sequence_numbers <- unlist(map(bold_list, nrow))
fish_sequences <- tibble(species = names(sequence_numbers), sequences = sequence_numbers) %>%
  filter(sequences > 0)

This tells us that barcode sequences exist in BOLD for 9,609 WoRMS fish species.

Now we can check how many fish species reported in the marine World Heritage sites have barcodes in BOLD:

occ %>%
  filter(species %in% fish_sequences$species) %>%
  summarize(species = length(unique(species))) %>%
  kable(format.args = list(big.mark = ","))
species
4,552